{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0c311dc3",
   "metadata": {},
   "source": [
    "# Use Categorical DQN to Play Pong-v4\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "43ee70df",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import copy\n",
    "import logging\n",
    "import itertools\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "from gym.wrappers.atari_preprocessing import AtariPreprocessing\n",
    "from gym.wrappers.frame_stack import FrameStack\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "torch.manual_seed(0)\n",
    "from torch import nn\n",
    "from torch import optim\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eb3b026c",
   "metadata": {},
   "source": [
    "Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4abedbed",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:37:24 [INFO] env: <AtariPreprocessing<TimeLimit<AtariEnv<PongNoFrameskip-v4>>>>\n",
      "11:37:24 [INFO] action_space: Discrete(6)\n",
      "11:37:24 [INFO] observation_space: : Box([[[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]], [[[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]], (4, 84, 84), uint8)\n",
      "11:37:24 [INFO] reward_range: (-inf, inf)\n",
      "11:37:24 [INFO] metadata: {'render.modes': ['human', 'rgb_array']}\n",
      "11:37:24 [INFO] num_stack: 4\n",
      "11:37:24 [INFO] lz4_compress: False\n",
      "11:37:24 [INFO] frames: deque([], maxlen=4)\n",
      "11:37:24 [INFO] id: PongNoFrameskip-v4\n",
      "11:37:24 [INFO] entry_point: gym.envs.atari:AtariEnv\n",
      "11:37:24 [INFO] reward_threshold: None\n",
      "11:37:24 [INFO] nondeterministic: False\n",
      "11:37:24 [INFO] max_episode_steps: 400000\n",
      "11:37:24 [INFO] _kwargs: {'game': 'pong', 'obs_type': 'image', 'frameskip': 1}\n",
      "11:37:24 [INFO] _env_name: PongNoFrameskip\n"
     ]
    }
   ],
   "source": [
    "env = FrameStack(AtariPreprocessing(gym.make('PongNoFrameskip-v4')),\n",
    "        num_stack=4)\n",
    "env.env.env.unwrapped.np_random.seed(0) # set seed for noops\n",
    "env.env.env.unwrapped.unwrapped.seed(0) # set seed for AtariEnv\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "192626a0",
   "metadata": {},
   "source": [
    "Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "47f918f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a2b9a656",
   "metadata": {},
   "outputs": [],
   "source": [
    "class CategoricalDQNAgent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "        self.epsilon = 1. # exploration\n",
    "        \n",
    "        self.replayer = DQNReplayer(capacity=100000)\n",
    "\n",
    "        self.atom_count = 51\n",
    "        self.atom_min = -10.\n",
    "        self.atom_max = 10.\n",
    "        self.atom_difference = (self.atom_max - self.atom_min) \\\n",
    "                / (self.atom_count - 1)\n",
    "        self.atom_tensor = torch.linspace(self.atom_min, self.atom_max,\n",
    "                self.atom_count)\n",
    "\n",
    "        self.evaluate_net = nn.Sequential(\n",
    "                nn.Conv2d(4, 32, kernel_size=8, stride=4), nn.ReLU(),\n",
    "                nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),\n",
    "                nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(),\n",
    "                nn.Flatten(),\n",
    "                nn.Linear(3136, 512), nn.ReLU(inplace=True),\n",
    "                nn.Linear(512, self.action_n * self.atom_count))\n",
    "        self.target_net = copy.deepcopy(self.evaluate_net)\n",
    "        self.optimizer = optim.Adam(self.evaluate_net.parameters(), lr=0.0001)\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = torch.as_tensor(observation,\n",
    "                dtype=torch.float).unsqueeze(0)\n",
    "        logit_tensor = self.evaluate_net(state_tensor).view(-1, self.action_n,\n",
    "                self.atom_count)\n",
    "        prob_tensor = logit_tensor.softmax(dim=-1)\n",
    "        q_component_tensor = prob_tensor * self.atom_tensor\n",
    "        q_tensor = q_component_tensor.mean(2)\n",
    "        action_tensor = q_tensor.argmax(dim=1)\n",
    "        actions = action_tensor.detach().numpy()\n",
    "        action = actions[0]\n",
    "        if self.mode == 'train':\n",
    "            if np.random.rand() < self.epsilon:\n",
    "                action = np.random.randint(0, self.action_n)\n",
    "            \n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= 1024 and self.replayer.count % 10 == 0:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def update_net(self, target_net, evaluate_net, learning_rate=0.005):\n",
    "        for target_param, evaluate_param in zip(\n",
    "                target_net.parameters(), evaluate_net.parameters()):\n",
    "            target_param.data.copy_(learning_rate * evaluate_param.data\n",
    "                    + (1 - learning_rate) * target_param.data)\n",
    "\n",
    "    def learn(self):\n",
    "        # replay\n",
    "        batch_size = 32\n",
    "        states, actions, rewards, next_states, dones = \\\n",
    "                self.replayer.sample(batch_size)\n",
    "        state_tensor = torch.as_tensor(states, dtype=torch.float)\n",
    "        reward_tensor = torch.as_tensor(rewards, dtype=torch.float)\n",
    "        done_tensor = torch.as_tensor(dones, dtype=torch.float)\n",
    "        next_state_tensor = torch.as_tensor(next_states, dtype=torch.float)\n",
    "\n",
    "        # compute target\n",
    "        next_logit_tensor = self.target_net(next_state_tensor).view(-1,\n",
    "                self.action_n, self.atom_count)\n",
    "        next_prob_tensor = next_logit_tensor.softmax(dim=-1)\n",
    "        next_q_tensor = (next_prob_tensor * self.atom_tensor).sum(2)\n",
    "        next_action_tensor = next_q_tensor.argmax(dim=1)\n",
    "        next_actions = next_action_tensor.detach().numpy()\n",
    "        next_dist_tensor = next_prob_tensor[np.arange(batch_size),\n",
    "                next_actions, :].unsqueeze(1)\n",
    "        \n",
    "        # project\n",
    "        target_tensor = reward_tensor.reshape(batch_size, 1) + self.gamma \\\n",
    "                * self.atom_tensor.repeat(batch_size, 1) \\\n",
    "                * (1. - done_tensor).reshape(-1, 1)\n",
    "        clipped_target_tensor = target_tensor.clamp(self.atom_min,\n",
    "                self.atom_max)\n",
    "        projection_tensor = (1. - (clipped_target_tensor.unsqueeze(1)\n",
    "                - self.atom_tensor.view(1, -1, 1)).abs()\n",
    "                / self.atom_difference).clamp(0, 1)\n",
    "        projected_tensor = (projection_tensor * next_dist_tensor).sum(-1)\n",
    "\n",
    "        logit_tensor = self.evaluate_net(state_tensor).view(-1, self.action_n,\n",
    "                self.atom_count)\n",
    "        all_q_prob_tensor = logit_tensor.softmax(dim=-1)\n",
    "        q_prob_tensor = all_q_prob_tensor[range(batch_size), actions, :]\n",
    "\n",
    "        cross_entropy_tensor = -torch.xlogy(projected_tensor, q_prob_tensor\n",
    "                + 1e-8).sum(1)\n",
    "        loss_tensor = cross_entropy_tensor.mean()\n",
    "        self.optimizer.zero_grad()\n",
    "        loss_tensor.backward()\n",
    "        self.optimizer.step()\n",
    "\n",
    "        self.update_net(self.target_net, self.evaluate_net)\n",
    "\n",
    "        self.epsilon = max(self.epsilon - 1e-5, 0.05)\n",
    "\n",
    "\n",
    "\n",
    "agent = CategoricalDQNAgent(env)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "165b9ffb",
   "metadata": {},
   "source": [
    "Train & Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "50e21fe7",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11:37:24 [INFO] ==== train ====\n",
      "11:37:47 [DEBUG] train episode 0: reward = -19.00, steps = 1095\n",
      "11:38:20 [DEBUG] train episode 1: reward = -20.00, steps = 945\n",
      "11:38:53 [DEBUG] train episode 2: reward = -20.00, steps = 917\n",
      "11:39:25 [DEBUG] train episode 3: reward = -21.00, steps = 879\n",
      "11:39:56 [DEBUG] train episode 4: reward = -21.00, steps = 863\n",
      "11:40:28 [DEBUG] train episode 5: reward = -20.00, steps = 837\n",
      "11:41:05 [DEBUG] train episode 6: reward = -20.00, steps = 925\n",
      "11:41:41 [DEBUG] train episode 7: reward = -20.00, steps = 966\n",
      "11:42:12 [DEBUG] train episode 8: reward = -21.00, steps = 785\n",
      "11:42:41 [DEBUG] train episode 9: reward = -21.00, steps = 757\n",
      "11:43:16 [DEBUG] train episode 10: reward = -19.00, steps = 919\n",
      "11:43:53 [DEBUG] train episode 11: reward = -20.00, steps = 960\n",
      "11:44:24 [DEBUG] train episode 12: reward = -21.00, steps = 761\n",
      "11:44:55 [DEBUG] train episode 13: reward = -21.00, steps = 816\n",
      "11:45:25 [DEBUG] train episode 14: reward = -21.00, steps = 762\n",
      "11:46:02 [DEBUG] train episode 15: reward = -20.00, steps = 943\n",
      "11:46:36 [DEBUG] train episode 16: reward = -20.00, steps = 887\n",
      "11:47:15 [DEBUG] train episode 17: reward = -21.00, steps = 996\n",
      "11:47:55 [DEBUG] train episode 18: reward = -20.00, steps = 1020\n",
      "11:48:28 [DEBUG] train episode 19: reward = -21.00, steps = 852\n",
      "11:49:11 [DEBUG] train episode 20: reward = -20.00, steps = 1098\n",
      "11:49:53 [DEBUG] train episode 21: reward = -19.00, steps = 1006\n",
      "11:50:29 [DEBUG] train episode 22: reward = -20.00, steps = 882\n",
      "11:51:03 [DEBUG] train episode 23: reward = -20.00, steps = 863\n",
      "11:51:36 [DEBUG] train episode 24: reward = -21.00, steps = 837\n",
      "11:52:14 [DEBUG] train episode 25: reward = -20.00, steps = 1004\n",
      "11:52:51 [DEBUG] train episode 26: reward = -20.00, steps = 939\n",
      "11:53:39 [DEBUG] train episode 27: reward = -19.00, steps = 1249\n",
      "11:54:15 [DEBUG] train episode 28: reward = -20.00, steps = 928\n",
      "11:54:58 [DEBUG] train episode 29: reward = -20.00, steps = 1051\n",
      "11:55:28 [DEBUG] train episode 30: reward = -21.00, steps = 757\n",
      "11:56:06 [DEBUG] train episode 31: reward = -21.00, steps = 938\n",
      "11:56:43 [DEBUG] train episode 32: reward = -19.00, steps = 938\n",
      "11:57:23 [DEBUG] train episode 33: reward = -20.00, steps = 1008\n",
      "11:57:57 [DEBUG] train episode 34: reward = -21.00, steps = 847\n",
      "11:58:34 [DEBUG] train episode 35: reward = -20.00, steps = 895\n",
      "11:59:08 [DEBUG] train episode 36: reward = -21.00, steps = 846\n",
      "11:59:43 [DEBUG] train episode 37: reward = -21.00, steps = 878\n",
      "12:00:26 [DEBUG] train episode 38: reward = -21.00, steps = 1060\n",
      "12:01:03 [DEBUG] train episode 39: reward = -20.00, steps = 922\n",
      "12:01:39 [DEBUG] train episode 40: reward = -21.00, steps = 910\n",
      "12:02:16 [DEBUG] train episode 41: reward = -20.00, steps = 917\n",
      "12:02:49 [DEBUG] train episode 42: reward = -21.00, steps = 849\n",
      "12:03:21 [DEBUG] train episode 43: reward = -21.00, steps = 791\n",
      "12:03:54 [DEBUG] train episode 44: reward = -20.00, steps = 840\n",
      "12:04:33 [DEBUG] train episode 45: reward = -21.00, steps = 973\n",
      "12:05:04 [DEBUG] train episode 46: reward = -21.00, steps = 790\n",
      "12:05:39 [DEBUG] train episode 47: reward = -21.00, steps = 883\n",
      "12:06:15 [DEBUG] train episode 48: reward = -21.00, steps = 905\n",
      "12:07:03 [DEBUG] train episode 49: reward = -18.00, steps = 1186\n",
      "12:07:43 [DEBUG] train episode 50: reward = -21.00, steps = 1032\n",
      "12:08:25 [DEBUG] train episode 51: reward = -20.00, steps = 1020\n",
      "12:08:57 [DEBUG] train episode 52: reward = -21.00, steps = 817\n",
      "12:09:32 [DEBUG] train episode 53: reward = -21.00, steps = 866\n",
      "12:10:15 [DEBUG] train episode 54: reward = -19.00, steps = 1070\n",
      "12:10:53 [DEBUG] train episode 55: reward = -19.00, steps = 933\n",
      "12:11:32 [DEBUG] train episode 56: reward = -19.00, steps = 971\n",
      "12:12:11 [DEBUG] train episode 57: reward = -20.00, steps = 983\n",
      "12:12:44 [DEBUG] train episode 58: reward = -21.00, steps = 804\n",
      "12:13:17 [DEBUG] train episode 59: reward = -21.00, steps = 823\n",
      "12:13:52 [DEBUG] train episode 60: reward = -20.00, steps = 864\n",
      "12:14:33 [DEBUG] train episode 61: reward = -20.00, steps = 1020\n",
      "12:15:09 [DEBUG] train episode 62: reward = -21.00, steps = 898\n",
      "12:15:51 [DEBUG] train episode 63: reward = -20.00, steps = 1037\n",
      "12:16:30 [DEBUG] train episode 64: reward = -19.00, steps = 974\n",
      "12:17:04 [DEBUG] train episode 65: reward = -21.00, steps = 824\n",
      "12:17:40 [DEBUG] train episode 66: reward = -21.00, steps = 867\n",
      "12:18:18 [DEBUG] train episode 67: reward = -20.00, steps = 930\n",
      "12:19:03 [DEBUG] train episode 68: reward = -20.00, steps = 1112\n",
      "12:19:38 [DEBUG] train episode 69: reward = -21.00, steps = 848\n",
      "12:20:11 [DEBUG] train episode 70: reward = -21.00, steps = 806\n",
      "12:20:44 [DEBUG] train episode 71: reward = -21.00, steps = 808\n",
      "12:21:24 [DEBUG] train episode 72: reward = -20.00, steps = 956\n",
      "12:21:59 [DEBUG] train episode 73: reward = -21.00, steps = 848\n",
      "12:22:41 [DEBUG] train episode 74: reward = -20.00, steps = 1042\n",
      "12:23:26 [DEBUG] train episode 75: reward = -18.00, steps = 1099\n",
      "12:24:00 [DEBUG] train episode 76: reward = -21.00, steps = 847\n",
      "12:24:39 [DEBUG] train episode 77: reward = -20.00, steps = 958\n",
      "12:25:27 [DEBUG] train episode 78: reward = -19.00, steps = 1016\n",
      "12:26:15 [DEBUG] train episode 79: reward = -19.00, steps = 1025\n",
      "12:26:57 [DEBUG] train episode 80: reward = -20.00, steps = 914\n",
      "12:27:37 [DEBUG] train episode 81: reward = -21.00, steps = 848\n",
      "12:28:19 [DEBUG] train episode 82: reward = -21.00, steps = 884\n",
      "12:29:04 [DEBUG] train episode 83: reward = -21.00, steps = 946\n",
      "12:30:00 [DEBUG] train episode 84: reward = -20.00, steps = 1163\n",
      "12:30:44 [DEBUG] train episode 85: reward = -20.00, steps = 948\n",
      "12:31:30 [DEBUG] train episode 86: reward = -21.00, steps = 957\n",
      "12:32:11 [DEBUG] train episode 87: reward = -21.00, steps = 876\n",
      "12:33:04 [DEBUG] train episode 88: reward = -17.00, steps = 1112\n",
      "12:33:52 [DEBUG] train episode 89: reward = -20.00, steps = 1020\n",
      "12:34:37 [DEBUG] train episode 90: reward = -21.00, steps = 947\n",
      "12:35:20 [DEBUG] train episode 91: reward = -21.00, steps = 899\n",
      "12:36:04 [DEBUG] train episode 92: reward = -19.00, steps = 930\n",
      "12:36:46 [DEBUG] train episode 93: reward = -21.00, steps = 880\n",
      "12:37:23 [DEBUG] train episode 94: reward = -21.00, steps = 792\n",
      "12:38:09 [DEBUG] train episode 95: reward = -20.00, steps = 975\n",
      "12:38:48 [DEBUG] train episode 96: reward = -21.00, steps = 824\n",
      "12:39:32 [DEBUG] train episode 97: reward = -20.00, steps = 918\n",
      "12:40:17 [DEBUG] train episode 98: reward = -20.00, steps = 956\n",
      "12:41:09 [DEBUG] train episode 99: reward = -21.00, steps = 1101\n",
      "12:41:46 [DEBUG] train episode 100: reward = -21.00, steps = 777\n",
      "12:42:25 [DEBUG] train episode 101: reward = -21.00, steps = 826\n",
      "12:43:06 [DEBUG] train episode 102: reward = -21.00, steps = 865\n",
      "12:43:52 [DEBUG] train episode 103: reward = -21.00, steps = 961\n",
      "12:44:42 [DEBUG] train episode 104: reward = -20.00, steps = 1041\n",
      "12:45:26 [DEBUG] train episode 105: reward = -20.00, steps = 929\n",
      "12:46:16 [DEBUG] train episode 106: reward = -20.00, steps = 1038\n",
      "12:47:28 [DEBUG] train episode 107: reward = -21.00, steps = 847\n",
      "12:51:20 [DEBUG] train episode 108: reward = -20.00, steps = 918\n",
      "12:55:25 [DEBUG] train episode 109: reward = -21.00, steps = 968\n",
      "12:59:56 [DEBUG] train episode 110: reward = -19.00, steps = 1066\n",
      "13:03:52 [DEBUG] train episode 111: reward = -20.00, steps = 926\n",
      "13:07:26 [DEBUG] train episode 112: reward = -20.00, steps = 842\n",
      "13:11:47 [DEBUG] train episode 113: reward = -21.00, steps = 1028\n",
      "13:15:52 [DEBUG] train episode 114: reward = -21.00, steps = 959\n",
      "13:19:13 [DEBUG] train episode 115: reward = -21.00, steps = 786\n",
      "13:24:31 [DEBUG] train episode 116: reward = -18.00, steps = 1238\n",
      "13:28:22 [DEBUG] train episode 117: reward = -21.00, steps = 909\n",
      "13:32:46 [DEBUG] train episode 118: reward = -21.00, steps = 1033\n",
      "13:36:50 [DEBUG] train episode 119: reward = -20.00, steps = 959\n",
      "13:41:40 [DEBUG] train episode 120: reward = -19.00, steps = 1133\n",
      "13:45:28 [DEBUG] train episode 121: reward = -20.00, steps = 857\n",
      "13:48:57 [DEBUG] train episode 122: reward = -21.00, steps = 805\n",
      "13:53:25 [DEBUG] train episode 123: reward = -20.00, steps = 1042\n",
      "13:58:36 [DEBUG] train episode 124: reward = -21.00, steps = 1210\n",
      "14:03:15 [DEBUG] train episode 125: reward = -20.00, steps = 1078\n",
      "14:08:46 [DEBUG] train episode 126: reward = -20.00, steps = 1290\n",
      "14:14:59 [DEBUG] train episode 127: reward = -19.00, steps = 1454\n",
      "14:19:40 [DEBUG] train episode 128: reward = -21.00, steps = 1101\n",
      "14:25:15 [DEBUG] train episode 129: reward = -20.00, steps = 1291\n",
      "14:33:24 [DEBUG] train episode 130: reward = -15.00, steps = 1903\n",
      "14:40:21 [DEBUG] train episode 131: reward = -14.00, steps = 1628\n",
      "14:45:46 [DEBUG] train episode 132: reward = -19.00, steps = 1264\n",
      "14:51:24 [DEBUG] train episode 133: reward = -18.00, steps = 1314\n",
      "14:56:42 [DEBUG] train episode 134: reward = -19.00, steps = 1233\n",
      "15:02:00 [DEBUG] train episode 135: reward = -20.00, steps = 1231\n",
      "15:09:38 [DEBUG] train episode 136: reward = -18.00, steps = 1775\n",
      "15:16:36 [DEBUG] train episode 137: reward = -18.00, steps = 1618\n",
      "15:22:21 [DEBUG] train episode 138: reward = -17.00, steps = 1318\n",
      "15:28:06 [DEBUG] train episode 139: reward = -18.00, steps = 1351\n",
      "15:36:12 [DEBUG] train episode 140: reward = -18.00, steps = 1897\n",
      "15:41:54 [DEBUG] train episode 141: reward = -21.00, steps = 1340\n",
      "15:49:30 [DEBUG] train episode 142: reward = -15.00, steps = 1778\n",
      "15:54:32 [DEBUG] train episode 143: reward = -21.00, steps = 1175\n",
      "16:01:08 [DEBUG] train episode 144: reward = -18.00, steps = 1537\n",
      "16:06:48 [DEBUG] train episode 145: reward = -19.00, steps = 1317\n",
      "16:14:13 [DEBUG] train episode 146: reward = -17.00, steps = 1724\n",
      "16:20:01 [DEBUG] train episode 147: reward = -19.00, steps = 1340\n",
      "16:26:59 [DEBUG] train episode 148: reward = -15.00, steps = 1596\n",
      "16:35:04 [DEBUG] train episode 149: reward = -14.00, steps = 1855\n",
      "16:43:51 [DEBUG] train episode 150: reward = -13.00, steps = 2009\n",
      "16:49:06 [DEBUG] train episode 151: reward = -20.00, steps = 1196\n",
      "16:55:58 [DEBUG] train episode 152: reward = -18.00, steps = 1555\n",
      "17:06:21 [DEBUG] train episode 153: reward = -17.00, steps = 1619\n",
      "17:16:03 [DEBUG] train episode 154: reward = -14.00, steps = 2129\n",
      "17:28:38 [DEBUG] train episode 155: reward = -7.00, steps = 2760\n",
      "17:39:39 [DEBUG] train episode 156: reward = -8.00, steps = 2394\n",
      "17:51:37 [DEBUG] train episode 157: reward = -6.00, steps = 2600\n",
      "18:03:12 [DEBUG] train episode 158: reward = -8.00, steps = 2499\n",
      "18:08:18 [DEBUG] train episode 159: reward = -18.00, steps = 1097\n",
      "18:16:56 [DEBUG] train episode 160: reward = -16.00, steps = 1860\n",
      "18:26:40 [DEBUG] train episode 161: reward = -15.00, steps = 2073\n",
      "18:36:32 [DEBUG] train episode 162: reward = -9.00, steps = 2118\n",
      "18:45:22 [DEBUG] train episode 163: reward = -14.00, steps = 1901\n",
      "18:57:19 [DEBUG] train episode 164: reward = -7.00, steps = 2458\n",
      "19:08:58 [DEBUG] train episode 165: reward = -6.00, steps = 2374\n",
      "19:20:28 [DEBUG] train episode 166: reward = -8.00, steps = 2336\n",
      "19:27:05 [DEBUG] train episode 167: reward = -19.00, steps = 1323\n",
      "19:40:36 [DEBUG] train episode 168: reward = -2.00, steps = 2728\n",
      "19:54:10 [DEBUG] train episode 169: reward = -4.00, steps = 2725\n",
      "20:07:06 [DEBUG] train episode 170: reward = -4.00, steps = 2590\n",
      "20:21:43 [DEBUG] train episode 171: reward = -2.00, steps = 2909\n",
      "20:29:59 [DEBUG] train episode 172: reward = -14.00, steps = 1647\n",
      "20:43:15 [DEBUG] train episode 173: reward = -5.00, steps = 2642\n",
      "20:53:09 [DEBUG] train episode 174: reward = -11.00, steps = 1961\n",
      "21:06:29 [DEBUG] train episode 175: reward = -5.00, steps = 2636\n",
      "21:16:24 [DEBUG] train episode 176: reward = -12.00, steps = 1956\n",
      "21:28:36 [DEBUG] train episode 177: reward = -8.00, steps = 2395\n",
      "21:41:56 [DEBUG] train episode 178: reward = -7.00, steps = 2623\n",
      "21:51:14 [DEBUG] train episode 179: reward = -16.00, steps = 1804\n",
      "22:05:11 [DEBUG] train episode 180: reward = -3.00, steps = 2865\n",
      "22:15:25 [DEBUG] train episode 181: reward = -10.00, steps = 2101\n",
      "22:26:28 [DEBUG] train episode 182: reward = -11.00, steps = 2254\n",
      "22:35:37 [DEBUG] train episode 183: reward = -14.00, steps = 1831\n",
      "22:48:30 [DEBUG] train episode 184: reward = -8.00, steps = 2513\n",
      "23:02:31 [DEBUG] train episode 185: reward = -7.00, steps = 2565\n",
      "23:17:12 [DEBUG] train episode 186: reward = -8.00, steps = 2996\n",
      "23:25:48 [DEBUG] train episode 187: reward = -16.00, steps = 1749\n",
      "23:36:40 [DEBUG] train episode 188: reward = -12.00, steps = 2241\n",
      "23:47:09 [DEBUG] train episode 189: reward = -12.00, steps = 2066\n",
      "00:02:27 [DEBUG] train episode 190: reward = -3.00, steps = 3220\n",
      "00:16:31 [DEBUG] train episode 191: reward = -3.00, steps = 2959\n",
      "00:29:26 [DEBUG] train episode 192: reward = -3.00, steps = 2711\n",
      "00:37:48 [DEBUG] train episode 193: reward = -12.00, steps = 1766\n",
      "00:47:19 [DEBUG] train episode 194: reward = -12.00, steps = 2017\n",
      "00:57:44 [DEBUG] train episode 195: reward = -11.00, steps = 2209\n",
      "01:08:05 [DEBUG] train episode 196: reward = -13.00, steps = 2190\n",
      "01:21:10 [DEBUG] train episode 197: reward = -2.00, steps = 2753\n",
      "01:34:42 [DEBUG] train episode 198: reward = -5.00, steps = 2868\n",
      "01:45:16 [DEBUG] train episode 199: reward = -11.00, steps = 2234\n",
      "01:58:27 [DEBUG] train episode 200: reward = -7.00, steps = 2778\n",
      "02:13:35 [DEBUG] train episode 201: reward = -3.00, steps = 3187\n",
      "02:29:25 [DEBUG] train episode 202: reward = 1.00, steps = 3316\n",
      "02:41:58 [DEBUG] train episode 203: reward = -4.00, steps = 2640\n",
      "02:55:32 [DEBUG] train episode 204: reward = -2.00, steps = 2856\n",
      "03:05:44 [DEBUG] train episode 205: reward = -7.00, steps = 2150\n",
      "03:20:04 [DEBUG] train episode 206: reward = -3.00, steps = 3035\n",
      "03:29:48 [DEBUG] train episode 207: reward = -12.00, steps = 2037\n",
      "03:43:40 [DEBUG] train episode 208: reward = 6.00, steps = 2929\n",
      "03:57:17 [DEBUG] train episode 209: reward = 3.00, steps = 2874\n",
      "04:10:48 [DEBUG] train episode 210: reward = 6.00, steps = 2859\n",
      "04:24:09 [DEBUG] train episode 211: reward = 3.00, steps = 2802\n",
      "04:36:43 [DEBUG] train episode 212: reward = 11.00, steps = 2642\n",
      "04:48:37 [DEBUG] train episode 213: reward = 8.00, steps = 2515\n",
      "05:01:34 [DEBUG] train episode 214: reward = -3.00, steps = 2707\n",
      "05:14:26 [DEBUG] train episode 215: reward = -2.00, steps = 2700\n",
      "05:26:14 [DEBUG] train episode 216: reward = 11.00, steps = 2473\n",
      "05:39:36 [DEBUG] train episode 217: reward = -4.00, steps = 2822\n",
      "05:52:36 [DEBUG] train episode 218: reward = 10.00, steps = 2710\n",
      "06:07:37 [DEBUG] train episode 219: reward = -2.00, steps = 3138\n",
      "06:21:51 [DEBUG] train episode 220: reward = 8.00, steps = 2950\n",
      "06:34:26 [DEBUG] train episode 221: reward = 10.00, steps = 2624\n",
      "06:47:50 [DEBUG] train episode 222: reward = -4.00, steps = 2809\n",
      "07:02:28 [DEBUG] train episode 223: reward = -1.00, steps = 3059\n",
      "07:15:04 [DEBUG] train episode 224: reward = 9.00, steps = 2643\n",
      "07:26:37 [DEBUG] train episode 225: reward = 17.00, steps = 2397\n",
      "07:42:03 [DEBUG] train episode 226: reward = 1.00, steps = 3215\n",
      "07:55:24 [DEBUG] train episode 227: reward = -1.00, steps = 2777\n",
      "08:04:59 [DEBUG] train episode 228: reward = 16.00, steps = 1999\n",
      "08:19:30 [DEBUG] train episode 229: reward = 4.00, steps = 2903\n",
      "08:31:44 [DEBUG] train episode 230: reward = 11.00, steps = 2504\n",
      "08:43:04 [DEBUG] train episode 231: reward = 14.00, steps = 2336\n",
      "08:56:32 [DEBUG] train episode 232: reward = 6.00, steps = 2758\n",
      "09:10:14 [DEBUG] train episode 233: reward = -3.00, steps = 2809\n",
      "09:22:52 [DEBUG] train episode 234: reward = 11.00, steps = 2570\n",
      "09:36:38 [DEBUG] train episode 235: reward = 5.00, steps = 2916\n",
      "09:47:02 [DEBUG] train episode 236: reward = 14.00, steps = 2421\n",
      "10:00:14 [DEBUG] train episode 237: reward = -1.00, steps = 3071\n",
      "10:10:57 [DEBUG] train episode 238: reward = 10.00, steps = 2501\n",
      "10:20:26 [DEBUG] train episode 239: reward = 13.00, steps = 2208\n",
      "10:34:08 [DEBUG] train episode 240: reward = 2.00, steps = 2966\n",
      "10:48:15 [DEBUG] train episode 241: reward = 6.00, steps = 2906\n",
      "10:58:47 [DEBUG] train episode 242: reward = 15.00, steps = 2167\n",
      "11:10:59 [DEBUG] train episode 243: reward = 10.00, steps = 2514\n",
      "11:23:40 [DEBUG] train episode 244: reward = 7.00, steps = 2556\n",
      "11:36:24 [DEBUG] train episode 245: reward = 3.00, steps = 2603\n",
      "11:47:40 [DEBUG] train episode 246: reward = 11.00, steps = 2317\n",
      "12:01:45 [DEBUG] train episode 247: reward = 6.00, steps = 2814\n",
      "12:13:31 [DEBUG] train episode 248: reward = 13.00, steps = 2377\n",
      "12:26:06 [DEBUG] train episode 249: reward = 10.00, steps = 2570\n",
      "12:37:33 [DEBUG] train episode 250: reward = 11.00, steps = 2341\n",
      "12:48:24 [DEBUG] train episode 251: reward = 9.00, steps = 2231\n",
      "13:03:09 [DEBUG] train episode 252: reward = 6.00, steps = 3022\n",
      "13:14:27 [DEBUG] train episode 253: reward = 16.00, steps = 2314\n",
      "13:25:18 [DEBUG] train episode 254: reward = 14.00, steps = 2190\n",
      "13:35:46 [DEBUG] train episode 255: reward = 13.00, steps = 2137\n",
      "13:46:51 [DEBUG] train episode 256: reward = 14.00, steps = 2284\n",
      "13:56:18 [DEBUG] train episode 257: reward = 16.00, steps = 1944\n",
      "14:06:26 [DEBUG] train episode 258: reward = 11.00, steps = 2067\n",
      "14:18:27 [DEBUG] train episode 259: reward = 7.00, steps = 2425\n",
      "14:27:05 [DEBUG] train episode 260: reward = 16.00, steps = 2118\n",
      "14:33:36 [DEBUG] train episode 261: reward = 19.00, steps = 1703\n",
      "14:40:48 [DEBUG] train episode 262: reward = 16.00, steps = 1886\n",
      "14:48:27 [DEBUG] train episode 263: reward = 15.00, steps = 2011\n",
      "14:57:13 [DEBUG] train episode 264: reward = 13.00, steps = 2281\n",
      "15:06:12 [DEBUG] train episode 265: reward = 12.00, steps = 2357\n",
      "15:14:12 [DEBUG] train episode 266: reward = 12.00, steps = 2108\n",
      "15:22:09 [DEBUG] train episode 267: reward = 14.00, steps = 2090\n",
      "15:30:04 [DEBUG] train episode 268: reward = 15.00, steps = 2081\n",
      "15:36:42 [DEBUG] train episode 269: reward = 20.00, steps = 1738\n",
      "15:45:25 [DEBUG] train episode 270: reward = 10.00, steps = 2294\n",
      "15:53:22 [DEBUG] train episode 271: reward = 14.00, steps = 2089\n",
      "16:03:27 [DEBUG] train episode 272: reward = -1.00, steps = 2638\n",
      "16:11:53 [DEBUG] train episode 273: reward = 11.00, steps = 2211\n",
      "16:18:55 [DEBUG] train episode 274: reward = 17.00, steps = 1843\n",
      "16:26:12 [DEBUG] train episode 275: reward = 18.00, steps = 1895\n",
      "16:32:20 [DEBUG] train episode 276: reward = 20.00, steps = 1599\n",
      "16:40:22 [DEBUG] train episode 277: reward = 16.00, steps = 2103\n",
      "16:40:22 [INFO] ==== test ====\n",
      "16:41:02 [DEBUG] test episode 0: reward = 14.00, steps = 1946\n",
      "16:41:44 [DEBUG] test episode 1: reward = 14.00, steps = 2076\n",
      "16:42:26 [DEBUG] test episode 2: reward = 14.00, steps = 2072\n",
      "16:43:06 [DEBUG] test episode 3: reward = 14.00, steps = 1952\n",
      "16:43:48 [DEBUG] test episode 4: reward = 14.00, steps = 2076\n",
      "16:44:22 [DEBUG] test episode 5: reward = 20.00, steps = 1688\n",
      "16:45:04 [DEBUG] test episode 6: reward = 14.00, steps = 2074\n",
      "16:45:43 [DEBUG] test episode 7: reward = 14.00, steps = 1949\n",
      "16:46:18 [DEBUG] test episode 8: reward = 20.00, steps = 1689\n",
      "16:47:01 [DEBUG] test episode 9: reward = 14.00, steps = 2076\n",
      "16:47:35 [DEBUG] test episode 10: reward = 20.00, steps = 1692\n",
      "16:48:17 [DEBUG] test episode 11: reward = 14.00, steps = 2072\n",
      "16:48:57 [DEBUG] test episode 12: reward = 14.00, steps = 1948\n",
      "16:49:37 [DEBUG] test episode 13: reward = 14.00, steps = 1953\n",
      "16:50:12 [DEBUG] test episode 14: reward = 20.00, steps = 1693\n",
      "16:50:46 [DEBUG] test episode 15: reward = 20.00, steps = 1673\n",
      "16:51:21 [DEBUG] test episode 16: reward = 20.00, steps = 1694\n",
      "16:52:00 [DEBUG] test episode 17: reward = 14.00, steps = 1948\n",
      "16:52:35 [DEBUG] test episode 18: reward = 20.00, steps = 1690\n",
      "16:53:10 [DEBUG] test episode 19: reward = 20.00, steps = 1691\n",
      "16:53:44 [DEBUG] test episode 20: reward = 20.00, steps = 1669\n",
      "16:54:23 [DEBUG] test episode 21: reward = 14.00, steps = 1950\n",
      "16:54:59 [DEBUG] test episode 22: reward = 20.00, steps = 1691\n",
      "16:55:39 [DEBUG] test episode 23: reward = 14.00, steps = 1947\n",
      "16:56:19 [DEBUG] test episode 24: reward = 14.00, steps = 1953\n",
      "16:57:01 [DEBUG] test episode 25: reward = 14.00, steps = 2077\n",
      "16:57:43 [DEBUG] test episode 26: reward = 14.00, steps = 2072\n",
      "16:58:22 [DEBUG] test episode 27: reward = 14.00, steps = 1947\n",
      "16:59:05 [DEBUG] test episode 28: reward = 14.00, steps = 2077\n",
      "16:59:39 [DEBUG] test episode 29: reward = 20.00, steps = 1693\n",
      "17:00:13 [DEBUG] test episode 30: reward = 20.00, steps = 1692\n",
      "17:00:47 [DEBUG] test episode 31: reward = 20.00, steps = 1693\n",
      "17:01:27 [DEBUG] test episode 32: reward = 14.00, steps = 1952\n",
      "17:02:07 [DEBUG] test episode 33: reward = 14.00, steps = 1948\n",
      "17:02:47 [DEBUG] test episode 34: reward = 14.00, steps = 1953\n",
      "17:03:21 [DEBUG] test episode 35: reward = 20.00, steps = 1687\n",
      "17:03:55 [DEBUG] test episode 36: reward = 20.00, steps = 1688\n",
      "17:04:35 [DEBUG] test episode 37: reward = 14.00, steps = 1951\n",
      "17:05:09 [DEBUG] test episode 38: reward = 20.00, steps = 1687\n",
      "17:05:49 [DEBUG] test episode 39: reward = 14.00, steps = 1948\n",
      "17:06:29 [DEBUG] test episode 40: reward = 14.00, steps = 1949\n",
      "17:07:08 [DEBUG] test episode 41: reward = 14.00, steps = 1950\n",
      "17:07:43 [DEBUG] test episode 42: reward = 20.00, steps = 1692\n",
      "17:08:17 [DEBUG] test episode 43: reward = 20.00, steps = 1671\n",
      "17:08:57 [DEBUG] test episode 44: reward = 14.00, steps = 1949\n",
      "17:09:31 [DEBUG] test episode 45: reward = 20.00, steps = 1673\n",
      "17:10:05 [DEBUG] test episode 46: reward = 20.00, steps = 1670\n",
      "17:10:39 [DEBUG] test episode 47: reward = 20.00, steps = 1669\n",
      "17:11:14 [DEBUG] test episode 48: reward = 20.00, steps = 1690\n",
      "17:11:55 [DEBUG] test episode 49: reward = 14.00, steps = 2076\n",
      "17:12:30 [DEBUG] test episode 50: reward = 20.00, steps = 1688\n",
      "17:13:12 [DEBUG] test episode 51: reward = 14.00, steps = 2075\n",
      "17:13:54 [DEBUG] test episode 52: reward = 14.00, steps = 2072\n",
      "17:14:29 [DEBUG] test episode 53: reward = 20.00, steps = 1687\n",
      "17:15:11 [DEBUG] test episode 54: reward = 14.00, steps = 2072\n",
      "17:15:45 [DEBUG] test episode 55: reward = 20.00, steps = 1669\n",
      "17:16:18 [DEBUG] test episode 56: reward = 20.00, steps = 1673\n",
      "17:16:52 [DEBUG] test episode 57: reward = 20.00, steps = 1668\n",
      "17:17:26 [DEBUG] test episode 58: reward = 20.00, steps = 1687\n",
      "17:18:01 [DEBUG] test episode 59: reward = 20.00, steps = 1672\n",
      "17:18:34 [DEBUG] test episode 60: reward = 20.00, steps = 1667\n",
      "17:19:09 [DEBUG] test episode 61: reward = 20.00, steps = 1669\n",
      "17:19:53 [DEBUG] test episode 62: reward = 14.00, steps = 2076\n",
      "17:20:37 [DEBUG] test episode 63: reward = 14.00, steps = 2073\n",
      "17:21:13 [DEBUG] test episode 64: reward = 20.00, steps = 1667\n",
      "17:21:47 [DEBUG] test episode 65: reward = 20.00, steps = 1672\n",
      "17:22:29 [DEBUG] test episode 66: reward = 14.00, steps = 2074\n",
      "17:23:09 [DEBUG] test episode 67: reward = 14.00, steps = 1947\n",
      "17:23:43 [DEBUG] test episode 68: reward = 20.00, steps = 1692\n",
      "17:24:23 [DEBUG] test episode 69: reward = 14.00, steps = 1953\n",
      "17:25:02 [DEBUG] test episode 70: reward = 14.00, steps = 1949\n",
      "17:25:36 [DEBUG] test episode 71: reward = 20.00, steps = 1694\n",
      "17:26:12 [DEBUG] test episode 72: reward = 20.00, steps = 1688\n",
      "17:26:51 [DEBUG] test episode 73: reward = 14.00, steps = 1950\n",
      "17:27:25 [DEBUG] test episode 74: reward = 20.00, steps = 1673\n",
      "17:28:07 [DEBUG] test episode 75: reward = 14.00, steps = 2073\n",
      "17:28:41 [DEBUG] test episode 76: reward = 20.00, steps = 1694\n",
      "17:29:22 [DEBUG] test episode 77: reward = 14.00, steps = 1951\n",
      "17:29:56 [DEBUG] test episode 78: reward = 20.00, steps = 1671\n",
      "17:30:35 [DEBUG] test episode 79: reward = 14.00, steps = 1948\n",
      "17:31:10 [DEBUG] test episode 80: reward = 20.00, steps = 1690\n",
      "17:31:52 [DEBUG] test episode 81: reward = 14.00, steps = 2078\n",
      "17:32:31 [DEBUG] test episode 82: reward = 14.00, steps = 1951\n",
      "17:33:06 [DEBUG] test episode 83: reward = 20.00, steps = 1689\n",
      "17:33:40 [DEBUG] test episode 84: reward = 20.00, steps = 1689\n",
      "17:34:23 [DEBUG] test episode 85: reward = 14.00, steps = 2074\n",
      "17:34:57 [DEBUG] test episode 86: reward = 20.00, steps = 1687\n",
      "17:35:39 [DEBUG] test episode 87: reward = 14.00, steps = 2074\n",
      "17:36:13 [DEBUG] test episode 88: reward = 20.00, steps = 1694\n",
      "17:36:47 [DEBUG] test episode 89: reward = 20.00, steps = 1691\n",
      "17:37:22 [DEBUG] test episode 90: reward = 20.00, steps = 1694\n",
      "17:38:02 [DEBUG] test episode 91: reward = 14.00, steps = 1947\n",
      "17:38:36 [DEBUG] test episode 92: reward = 20.00, steps = 1673\n",
      "17:39:15 [DEBUG] test episode 93: reward = 14.00, steps = 1951\n",
      "17:39:55 [DEBUG] test episode 94: reward = 14.00, steps = 1950\n",
      "17:40:34 [DEBUG] test episode 95: reward = 14.00, steps = 1952\n",
      "17:41:17 [DEBUG] test episode 96: reward = 14.00, steps = 2077\n",
      "17:41:57 [DEBUG] test episode 97: reward = 14.00, steps = 1953\n",
      "17:42:31 [DEBUG] test episode 98: reward = 20.00, steps = 1673\n",
      "17:43:05 [DEBUG] test episode 99: reward = 20.00, steps = 1692\n",
      "17:43:05 [INFO] average episode reward = 16.94 ± 3.00\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABJNklEQVR4nO29d5gcV53v/T0VOkzuCZZGYUaSLQc5S3LCNsFgbMKuDbvL9S6wJppkFu4u91149r4s++6ysMDacF/SsiQTjYm2CTbG2BbGOIxkyZYsK2uk0Yw0M5ruCZ2r6tw/qs6p0NU9rcnd+n2eR496qqurq6alb337e37ndxjnHARBEER9oiz2CRAEQRDzB4k8QRBEHUMiTxAEUceQyBMEQdQxJPIEQRB1jLbYJ+Cls7OTr1mzZrFPgyAIoqbYunXrKOe8K+y5JSXya9asQV9f32KfBkEQRE3BGOsv9xzFNQRBEHUMiTxBEEQdQyJPEARRx5DIEwRB1DEk8gRBEHXMrEWeMbaaMfYIY2w3Y2wXY+xDzvZ2xthDjLF9zt+J2Z8uQRAEcSrMhZM3APwD5/w8AFcC+ABjbAOAjwJ4mHO+HsDDzs8EQRDEAjJrkeecD3HOtzmPJwHsBrASwE0A7nJ2uwvAzbN9L4IgiFpj//AUnjgwWvb5R14cxtGxzLy9/5xm8oyxNQAuBfAUgGWc8yHAvhEAOKPMa25jjPUxxvpGRkbm8nQIgiAWnS89sh8f+9nzoc9ZFsd7v7cV33j80Ly9/5yJPGOsCcBPAXyYcz5R7es451/jnG/mnG/u6gqdlUsQBFGzTOaKyBXN0OdG03nkDQsT2eK8vf+ciDxjTIct8N/nnP/M2XyCMdbtPN8NYHgu3osgCKKWyBRMFM3wFfiGUjkAwGTemLf3n4vqGgbgGwB2c87v8Dx1H4Bbnce3Arh3tu9FEARRa6QLJoqGFfrc0HgWgO3254u5cPJXA3grgOsYY9udP68F8GkA1zPG9gG43vmZIAiibug/mcZnH3wRldbKzuQNFK1wkR90nPzUPDr5WXeh5Jw/DoCVefqVsz0+QRDEUuU3O4/jS48cwJuv6MWKtnjoPpXimsGUcPJLOK4hCII4XUmmC/bfmULZfTIFA6bFYVqlQj807mTyJPIEQRBLDyHuqUz5TD1dsCtrimZpZDPoZPJTJPIEQRBLj7F00fk73MkbpoWCM+gaJvKiuqZgWmXLLGcLiTxBEMQMSUknHy7yGY9wB3P5omlheDKHtgYdwPwNvpLIEwRBzJAxR9yFow+SybsibwSc/O6hCVgc2Nhj926cr1yeRJ4gCGKGiCy+3MBrpuAKdyEg8lv7kwCAV5xjz/Sfr1yeRJ4gCGIGWBafPq4plI9r+vqTWNEaw1lnNAOYvwlRJPIEQRAzYCJXhKiKHCtTXeMXeb+T39afxKY17WiOac7xyMkTBHEa8pov/AE/2Tqw2KdRgreippyTT3vjGk9rgxMTOQyN57Cxp02KPA28EgRx2mFZHLuHJrB/eGqxT6WEpOPeEw162RJK38CrZzKUyPCXtcTQHLOrayiuIQjitEMMVloVesMsFmK267quprKTobwDr964Jle0H8d1FU1Rx8lTXEMQxOlG3ok4jDK9XxYT4cbXdTZiKm/44hiBL5P3PJ91tsd0FRFNQVRT5q3dMIk8QRBLlrxhi6HpdHHMFAyMz+MCG0GOO71lBOPZonTnwr2v62pyfi6NbNJlSijF7NaYbktwc0ynuIYgiNMP4Y5NJ675l/tewHu/u3VB3ntr/xiu/NTD+MWzx+S2d377GXzivl0AgFS2AFVhWNPRAAA4MZEvOUa2TAmlEPl4RAUANMc0mgxFEMTphxR5Z9BydCqPkalSMZ0PBpJ287CHXjght+05MSk7R+aLFqKaggtWtgIAth9NlhwjXWbGa1aIvG6L/KffeCFuv+6sOb4CGxJ5giCWLCLiECJvWLykPcB80dYQAQB5U5nKG5jMGTJnL5gWIpqCVYk4lrVE5QxWL+VmvGaLbiYPAFes68C5y1vm5TpI5AmCWLLknSoUUX5oce4rRZxPREXPqCPyQ84CH2lngLRoWtBVBYwxbOpNYOuRMJE3oSnM2d89b+/A63xDIk8QxJIl6ORNi8+40sa0OLbsHal6f1ENMzppi/ygE9MIF543LERUW0I39iRwdCyL4Qn/QG2mYMguk94SSlE1FCeRJwjidCaYyRsWh1FmvdTpeHz/KP72m09j99BEVfsL5y3aDbhO3pTPRzRbQi/taQMA7Bwc9x1jeDKPzqaos7+/hFJhgK6WWzl17iCRJwhiyeKWUDpxjcXLrpc6HWKyUaVVnLx4byacc+nkRc5eMEzp5Fcl7AqbYynXyWcLJl4YnMBla9qd/f2ZfFxXwRiJPEEQpzFCGI05GHgtmPYNwzsYWs17A8DJdEEuup0tmvJmo2u2SHc2RaEpTLp9AHhuIAXD4rhiXbvvGgC7hFKUT843JPIEQSxZRHZteQZeizMceC0a9uvSheqW2fN+YxhK5TDkrMfKOZAzTBQ8mbyqMCxricnySsBuJQwAV6ztcN7f7+QXYtAVIJEnCGIJU+LkTdfJJ9MFfOuPh8ADfW0e2DkUmrvnnddlq3Ty3gx9cDwr12MF7Fy+4FTXCFa0xaTbB+xWwmd2NaKzKQLGgGOpLL7/VD8A28mTyBMEcdqTDwy8WpzD4raz/+Vzg/iX+1+QWbng4/fuwl1PHC45lnDS3glKlfCK/M5j4ziazGB5SwyAnbcXDEsOvALAirY4BsddkX/x+CQuWNkKxhh0VcHdzxzFP/18J4Ync8gWzAWprAFI5AmCWMIEq2u8VTYnnS6QuaJftPOGFTo4K8oxq87knf1XJeL4wVNHUDQ5rlnfCcDuSVM03bgGALpb4zg+noNlcZgWx/GJHFa2xQHAt5/CGHJFi0SeIAgirE4esCtfRJVMsPtjwbBkQzMvwslnqszkRT3+Ves65A3lWkfkM0LkNX9cUzQ5RtN5jEzmYVoc3Y7Ie0slLYsjWzQR1RdGfknkCYJYsrgzXv2Nyoomlwt1BEW+aFqhs2JdJ199XKMw4LK1dnXMmV2N0plnnLhGDzh5wB6kFbHNilY73tE8+xkWt6tryMkTBHG6I8oeRfoi3LVhWrKfe94j8pZltz0QLQke2zuCL/5+n3MskcmXxjX3bj+G7z7ZH3hvW8Q39yYAAJt6E7LsMZ03fZOhAKDbEfTBlDtIuyIkrjGFyC9QCaW2IO9CEAQxA9xM3r9ClGHx0LhGCLm4Gdz6zacBALdft17ulymWOvmfbB3A8fEc3nplr9xWNDgiqoK1nY1420vW4OZLV6IxYktmpmAgH3DyazsboTBg99AEWuJ2K4MVraVxjenENTGNRJ4giNMcV+Ttn0UMUzQtN64xXdEOZvheRLVMJsTJ54qmXLPVu7+mMjDG8Ik/Px8AMDwpZr2azsCrK96NUQ3nLm/B1iNJnLOsBQ0RFS1xW2L1QFyTLdBkKIIgCE8JpX9SlGFyuRKT18kXA3X1XsR+YZOh8oaFVKbgq7kvBurgAaDB4+SDJZQAsHlNAtuPpHA0mUF3a0y2LdBL4hqL6uQJgiCCk6HEwGumYEqxzofENeFO3t6WDRP5oj1Y611ntWjyEpEXg6V2Jl96E9jUm0C6YOLRPcMyjwcA3XMzKBgWCiaVUBIEQchZqsLBm6a/xzvgF3nRuiDYqZJz7nHyIXGN0wgtlXYjm2CJJGC3L4jrKtJ5A4bFS57f2JNwXstlHg8AuuLGOlPOjSS2QCWUlMkTBLFkCS4aIpz88KQr8mEDr6bFfdtNi7sllCEzXsX7jGUK6HHWbLWdemmXyMaoipSzmHjQya9ub8DHX78BA8ks3nTZKrndu58QeaquIQjitCds+T8AGJkMd/LeeEdk9uLnYoUZr6KlcdLzmrA4BrDFWVT2RLXS599xzdqSbd64Ji2dPMU1BEGc5hRC+skDbpWLvY8nrvHcFLzVMgXTcksoC2ZJU7Nc0W165r6G+yYxCRojGsaz9n5hN4EwvFU4kyTyBEHUI8E+8JbFpWiXI2xlKMDv5KV7Ny1fnbzXlRum6+QNT3RjOucgnPxYuiCPVzT8JZKChogqbyDBTL4cvrjGWbyEBl4Jgqgb9p6YxHkffwAHR6bktlu+9iTueGhvxdflPfGL94YgRJ4xu04+UzBw6b8+hN88fxyAPWnK68oNj5MH3Aqbm7/0R3z+d3shDn3nQ3vx0s884sycDY9rGiKajGuqdfLebwTpBR54JZEnCGLeGUhmUDQ5Doyk5bajyQwGkpmKrxPCbFlcDroCwGTOkJUu+aKFiayByZyBfcOTAOybQklc4+lMKcov+0+msfv4pG/78YkcRtN5FEJKKAGgKarJvL96J+9+IxBdM6u9QcyWOXkXxtg3GWPDjLGdnm3tjLGHGGP7nL8Tc/FeBEHUHqJ6JRkYDA2btOSl4IlYvLXv6YKBmKYgoikomJYsmZxwql7sTN4f1xQME2JJVTHrNW9YPscvGErlUDTCnXxzTJPnHRbnhOHtXSPKNRdiEW9g7pz8twHcGNj2UQAPc87XA3jY+ZkgiNMQIda+ihfTCp205EXcHMygyOcNRHUVUU1BwbBkr5oJJ+82LL94G5bdY74lZveUEYOvBdPCWCZE5MezTp18qRA3xdyixJlk8mKQV1NqyMlzzrcAGAtsvgnAXc7juwDcPBfvRRBE7SGy9THPZCPh5FOZAk5M2NUypsWx74Qbn8gBUu53/VN5A1HHyecNt7WwdPIm94l3wbDr5tsabJFPF+zJTJwj1MkPpnJ275oQIW52bhRA9ZGLX+RtJ6/VmJMPYxnnfAgAnL/PCNuJMXYbY6yPMdY3MjIyj6dDEMRiIbL1VCBCMS2OT/5qN973va0AgP944EVcf+cWHB3L+F4XdPKi90tEVZxFQuznxh2RNywuBd/+2ULRtNDWEAFgV7jIc8r6G5PFdMVx8uGZfIvXyVcp8staor5zB2osk58NnPOvcc43c843d3V1LfbpEAQxD7hO3hV5IdzJTFFWqzyw87izv+12w2rgBbaTVx0nH5gZa3F/uwPTdvJnNNtim8oW5fNiPPerb9mEHR9/Nbpb49LJh8Y1UVfk9SrjmrdfvRb3336N79pUpfad/AnGWDcAOH8Pz+N7EQSxhHGdvKc3jGU5Qm+h6Ii0O8mJwXLq2YVbDq4AFdUUO5MPyfYNi8ucHnBr6IWjTmUKJcdrjetobdDR3RrDoJPJhw+8unFNtU4+oinoaLK/RYhxBr2WMvky3AfgVufxrQDuncf3IghiCSMEVVS8WE4ebliWT5BznoFWkceLHi/5oMjrqp3JF82SKh1T1rnbbrlo2sdri0egqwxj6WLpTcOpW+9ujdvVNeVKKGcw8Aq4zl1U19RUJs8Y+yGAPwE4hzE2wBh7J4BPA7ieMbYPwPXOzwRBnIaIhT2EyAvnbjoCXzSDTtydvSpmhlZy8oZZKvIFk8vWAbmiCc5tUU40RJDKFGRs4j0eYC/IPTyZQ7Zohgpx8wwyecAj8gs88DonDco4539d5qlXzsXxCYKobdw6+SI45742BQy26/b2kzFMt4tkg+PkCyWZvCpbCAdbC9s3D7tn+2TOkO2FhciPpQsl3wzEDWF5a8yeAct5qIg3zyCTBwDNEXnxvgsV11AXSoIg5h1vrxhRyy5+ZrBgmtyX13u7RsbKOXldAef2AGtpJm+7e3GDyBTcWaaJRh2pTDHkpmGLbrtTgSP2DzKTTB5YPCe/6NU1BEHUP16BTmUKrpN3yiiLloXB8azcx7S4XABE9HgJinxMc0sog5m8xe0bS1wu12cLq3TyIQOvUWdh7bZpRL5phnGNqLk/7UooCYKof7zRSDJTlB0pTWdClGFyDKXc9sGGp+LGHXgNZOi609bAsOSKUV5yRRNx5wYh2hhEVIZEo8jkg3GN4+QbvSIfsmhIRIWofpzRwGuxfkooCYIgAPhdeDJdkM7bcMooDYtjKOjkRVzjOOywOvmopjq9a8JFXiy8nfY5eR3JTBH5YnDg1X6fRIMnjgkRccaYrJU/lf4zwUxeI5EnCKJeyBtuvftU3pDVMBZ3xXtkKrCSk4hrypRQxnwllP7nADsWictM3nbyumrHNabFfROzGHMF2xvXlOsv0xzToTCELipSDkVhskGapjAwRiJPEESdUDAtNEZtwbVr4y35WOTzWc+yfIbJS0ooS+rkPV0owxqd5QxTvlZm8o7IA8DxiZzvWEJ0ve69nFNvjmkzytSFe1+oQVeARJ4giAUg74lOiobbbMw03cdCiAHYs2BldU25Ga/2wGvR5CXPAXa7goagk9fs6hoAsima/R7hqzSVy9ybotopDboKRA6/UOWTAJVQEgSxABRMS+bYRcudvGRYHEJes56M3AjJ5Euqa3QFHLzktV5EXJPO289HVUVuOz7ud/Jeok53y3JuvTmmndKgq0Bl5OQJgqhDCoYnrjHdyVBi0BVwq07kdlOUUJafDCXctPdbgBc3rvE4eRnXuOvEikFXgTuwWsbJx/QZxTWqjGvIyRMEUUcUDEuWJhZNtzzSsDgUXhrX+DL5SPm2BqZVWeSjmgrG/Jl8a9yOa0anXJEPrrfaFNNwMl0om8m/5YoeXHNWx3SXXYIQ94WqrAFI5AmCWADyhoVGkckHnLwlluQr+J38dDNeY7oqB3BFHXwQTWXQVcU341VMZhLVNTFdKXHy4lzLufUr1nXginWnLvIqDbwSBFGP2HGNLZyG6Q6q+qtr3GqYYtjAa5k6ecCtgw+iqwy6wpDOu71rdFVBTFdgWhwRTUFjRCvJ5MWNYK6lWFuEgVcSeYIg5h27xYAtN0VPyaM3k88UDBnNeNsaBLtQCqEUM14Bf/mlNwnRVQWaqiDriWsAt/9MVFXQEFVLqmtEE7KpMt8QZgo5eYIg6oJ/uGcHXv7ZR/DgruO44c4tyBQMRFSn5NEzqOpd1i9XtKSgezP5WKBOXrhusfwf4I96RKkmYGfguqpgSg682uIqRDyiKWiJ6bLUUnDmGU3O/nMrkbJOnkooCYKoZX66bQAAsP1oCnuchbmjugJNZTA8bQhES1/AdvIdTfbKTaJVMFBaJx/TVaQLppwMBfhLKGO6AjGmqisMusrkEn8iMhI94SOagn+9+QKZwQv+4dVn49zlzXj52XO7JKmsk19AJ08iTxDEvJH0tA6IqAo0haFougLuJVMwsUo4ec8M1mAJpXDyUU1FRLPFPVswS/YHbCfvjUaEmIvMPaop2NiTKDmXqKbijRtXncqlVoVw8FRCSRBEXXAs5TYdizjOu1imoVjesKRrNy0LFrfFWbY1cNy6EPGYrsjqF39c44q8PfBq79MU1aSTbo7q8pwWEvH+C9WBEiCRJwhiHjk4kpaPo5oCTVFgmDy0oRjgCnjR5GCMB7bZr4l4nbzqxjUKs+OfuO4VefdG4F22zxvXLCQU1xAEUVd4nXxUs6OTYsiarN59FGZn8hwcmpOpA564xhFxbyafKRhojGjgADqdXB+wBzpFXNPkWbbPjWvCe9bMF+oiDLxSdQ1BEHNOcAYp4MQ1oromJK4B3JJHu3cNh64qUB1BFOvEeqtrxA0gUzChawruu/1qvP3qte7xNEXm334n78Q1C5iNA546eSqhJAiilmnxrIMqiGpqSXVNEN0ZnBVdKDWVyaZewYHXiMfJ54omVIVhXVeTT8x1RUFEOHnPOXlLKBcScvIEQdQFLfFSkY84mbwd14Rn8rZzZ7ILZURVoIq4xlNCqasMqsKkEy+aXLpk76CmpjIpqEshkxfREU2GIgiipvHm34KIqkBXma93TRBdZY6Tt2e86qriOnnPZCjRftgr0mGzSXVPCWVLLCyTX2gnr8jzWihI5AmCmHO8Ei5WuYs6JY+GZaFYZuBVZPBF03byusakeIsZr70dDVjV3iD3F7izSb0i77p9741HZvIL7eQXoYSSRJ4giDnH9JRIrmyLA3AmQ0knXz6uEZl8wbTkz4CbyX/wuvW49wNX28cMcfKq4hV+Rd5kmr2Z/KI5eRp4JQiiDvBG7uu67D4wogNk0azg5DW75NGbySuKP67RVXfA1e/a7W2qZ4HsiMaQc6pyfJm8GHhdpOoaGnglCKKmsTyZ+7rORgB2dY2uKr6VoYJEpJO3m5hpTkYPuCLvTToYY1LwpZP3uGRNUWRfm7C4Jlpmbdf5grpQEgRRF5hORzCF2Rk6IKprmG9lqCDe6hoR16ieuEZVGBjzC6Rw42GZvKYyuaygN64RA68L7eTduIacPEEQNYxw6tedewY297bjvO4WLG+NybjGLBPXiJJHUwy8ekTetLgvihGUOPlAhOOKvOvkGyMqrlzXjotWtc7B1VaPGnIjmm+orQFBEHOOaXHcfMkKfP6WSwEAv/nQtQDsAUejwozXiHTydm4f11WfsIdVpYhBTNnhsUTkSzN5xhjuvu2q2VzijJDfNsjJEwRRy5gWlwOmXjS1coMyUdcuBl51lUFRmMzhw0S+kpMvF9csFmrIjWi+IZEnCGLOsTgPFTJdZShUaFCmewZeC4blVsw4xwrTRrGPFnD0gN3WIBcy8LpYaDTwShBEPWBYPNR1262GK/WuYZ52xFwuv6dWiDmCA6/+TJ7JSVTeuGaxkAOvtPwfQRC1jGVxKCGDpKKEslzvmohmZ/KmiGuEcDPh5CvFNaVRiKow/Og9V+H+HYMLPvEpjMVw8iTyBEHMOSYPd/Iyrinj5DXFzuTzholiSFwTFgEFnbyiMDBm/8wYw6beBDb1li7xtxioKg28EgRRB5jl4hpRXVM2k2eeOnk3rtECYu9/jfOcbxIUW9Ba9GqR/eRp4JUgiFqmXE27rioyiglrDqY7E6YM021rALgxTaXqGq/LVxhb0AqWalEXYSFvEnmCIGbEiYkcbv7SHzE8kSt5rpyTF+46Z5i+jFwIst3WwL0RuDXw5UVeVtco/uMtRScvbnxUQkkQxJJn1+A4th9NYffxyZLnLF6mTt7Zli2YcoFuwO0GqTuLhBiWXWYphDrRGAEQXkIZ0UqFU/Ws7bqUoEVDCIKoGSZzBgAgWzBKnqsU1wBAtmj5nLwQfLFoiOhdI2KNFa0xAOHdGyNhmbyqLE0nX49dKBljNzLG9jDG9jPGPjrf70cQxMIgRD6dN33bOeeweOUWBLmi38m7Im+XUIpFu8X6rN1ttsiHfTvQ1dJMXl2icU3dLeTNGFMBfAnAawBsAPDXjLEN8/meBEEsDELkMwEnL5qThVfXuAtve518VHfjGk1hsj2wEOruVnvhkWJIfX2wrQFgi+nSHHitvxLKywHs55wf5JwXANwN4KZ5fk+CIBaAqXwRAJAp+J28aDNcaZA0G3TymhvXqIqCbMEv8iscJx82yFuTTr6OBl5XAjjq+XnA2SZhjN3GGOtjjPWNjIzM8+kQBDFXyLgmIPKi91j4jFd34NWfybtOXkyYAiDr5IWTn8iV5v/RQC09IKprlqKTr78SyrDfsm8WBOf8a5zzzZzzzV1dXfN8OgRBzBVTIq7JB+Iax8mHxSViwDFvWKGZvGhrIBCZ/ApH5MMo5+QXUkirpR4X8h4AsNrz8yoAg/P8ngRBLAATZZy8WBAkvNWwu82XyWv+TF4gBHxZa7TseYRn8soSdfJ1NvAK4BkA6xljaxljEQC3ALhvnt+TIIgFYDJnZ/LBEkqZyYfVtKulwq55XLfmZPICsT2qlV+LtaYy+ZB2yPP+nvN5cM65wRi7HcCDAFQA3+Sc75rP9yQIYmGYypdx8hWra/wunTEhyN4Zr964xi+GzSE94YNdKAGgoymCzqby7n+xaG+MQGFAonHhFjCZ9y6UnPNfA/j1fL8PQRALS7kSSktW15S6VS3g0kWpo9juXdMVANoaXDHc8fFXI8wARwKtDwDgi3+9MXTfxeaaszrxh3+8Tg4kLwTUapggiBkhnHywhNKQTr70NaIFAeB2nBRirwRcPQAkGiLycWtDuPuVDco8ryu372LDGMPKtoUTeIDaGhAEMQM45zKTz+SDJZTOwGtICaXXyauOg9ecPjNu73h3n2pijbBMnnAhkSeIGuBYKotnjyQB2LM+H9x1HJyH92SfD/admMS+E24jsrxhoehU0aQLBrbsHcF41hb9U8nkVcV188HFPwCgLR4pOUaQsEyecKHfCkHUADfcuQVv+PITAID//O1evOe7W/GngycX7P2vv3MLrr9zi/xZ5PEKA4Yn8rj1W0/jp1sHAFSe8eodSG2MaDKTP7OrCeuXNZW8LqznfBBy8pUhkSeIGkDk37miie1HbUe/gEa+BBHVdDZFUTAtcI4qnbwrOZf0tNlOXmV45zVr8bP3X+3sY7+uMVK+bNJLWJ084UIiTxA1xPHxHIYn8wDcVgCLgbjpLGuJyW2iykaKfGgm7267ZHWbzOW9CLFujlU3eBpRSwdeCRcSeYKoATqb7Gx6cDyLkQlb5Itl1kmdT8Sgqohr/CJvD8AKka/UFhgAWuO6zOR9+zii3xyrrvjPXf6P5CwMKqEkiBrgjOYYRqcKGEzlMJn3O+ZyHB/P4ZfPDeKd16wFC3HVXo6czOArjx2AYVpojGr4yA3noClk4tFEroi2hoiMa5a1uBOOMgUTv3puCKNT9k0oLCMPCnpYS2DXyVcnT25VDjn5MEjkCaIG6GqOAkPAUCort4X1Vvfym51D+Ldf7cbNl66cdvbnb184jh8+fQQdjRGcTBfwyvPOwLXrSxsGjqULjsiXOvl03sC//3q3nIQU5uQTDTquXNeO9738LADhy/SJn1vi1cU1azsbcfnadpy/oqWq/U836PsNQdQAYoLQoZNpuc2YJq4RN4G8Uflm4N3nq2/dBAAolHlNMmM7eCHyyz0iny2amMgVkS3Yrw3N5FUFd992FV52tn0D0RSlpPRRzJitNpNvjeu45z1XYXV7Q1X7n26QkyeIGkDMIt1+JOXZVlm8xWtyRbPifoAr6g1ORUu5bwnJdAGAO/Da5YlrJnMGpvKGrPqpJj5RQ+Ia0cK42riGqAw5eYKoAUT+fnDUdfLTDbwKpy/WS61EwbSgq0x2hizn/pMZW+Qnc0XEdRUtHrc9OpUH5+5NJWzGaxBNLR14nSCRn1NI5AmiBrBCiuKrdfJ5Y3onXzQsRFQFEVU4ef/7Cb0WIj+VN9Ac09AYdWvZRWmneN9qShrDnLy4wVQz25WYHrpVEkQNYJgcEU3BS9d3Im9Y+MO+0SqcvC2WuWqdvKZAdxqIBTP5qKYgV7RkJj+RM9AU03BmVxPe9pI1GEhm8bvdJ3yvqcbJv+0la0raCb/r2rVIpgu49SW9076emB5y8gRRA1icY3NvAl+/9TJ89i8vBjD9wKt5Kk7eFE5ekT97Ee8lMvnJnIHmmA5dVfCJPz8faztLBz2ryeRvumQlXnNht29bS0zHv958ARoi5EHnAhJ5gqgBDItL0RQxiDlNXCOcfrXVNbqqyIWzvU6ecy4jGBnX5Iq+BTzCBDmsuoZYeEjkCaIGsDwiL2aEThfXiJtANdU1RZMjqrlOvuBx8t73SabdEkrvwGhDSJ8Zmpy0NCCRJ4gawLC4dMbCyU/mDNz4+S3YdiSJd93Vhx8+fcT3mqJVvZMvOk4+LK7xDvAGB14FDSGzY0nklwYk8gRRA5gWlzNIhcgPprJ48fgkdg1O4E8HRrHjaMr/mlOIa+yBVwbFqXbxxjVeJy/600zmDDRF3fLJsI6RS3Ad7dMS+hgIogYwLS5LDUVck3a6PuYKJrJFs0TMi44Dz1cV11jSxeuq4nPy3sfZognT4qVOPkTkq6muIeYfEnmCqAFM7jp5RWFgDMg6rnoiV4TFS6tozFOIa8TAK2B3dfQ6eVFZ0xzVkC2Y8ubiF/nSuIa6Qi4N6FMgiEWgYFjTDohmCoasdfc6ecB280JsRU4erId3Z7xW6eQ118kXPBGNcPJNMQ05w5R9a7wi750UJSCNXxrQx0AQi8A//fx5vPs7fWWf55zjdf/ncdz5u70AbJH3liRqKpNOXlS8BJ28GDDNVZPJG25cEw04eSHyLTEdnAMnnVbC3kw+rtPA61KFZhsQxCKwa3ACw5O5ss8fHcvi0Ggau4fsxbO9JZSA3YddDIIKJx/sUTNzJ88C1TWiK6QtFyNO+4LpnDzVyS8NyMkTxCIwNJ7F6FSh7GzUvv4xAHYFDeCfDAXYkYoQ+TFnFmquxMmfQnVNhUxeCL4Q9eEQkQ+dDEVOfklAIk8QC0y2YMoeMMfHw9381n57se4h53mLB5y8yuSaqinnWCVO/hQnQ3kzeZ+TFwOvTsfJqp08ifySgESeIBaQgyNT2H18Qv48mKos8uPZoj0AWxLXeJy8iGsc9z2WLuDwaNqNawJOfv/wJMadG4OgpLompITSdfI552c3k49ptsh7zzFsZShi4aFMniAWkOv+8zHfz4Oe5fwEpsWx58QkViXiGEhmMZjK2ZOhAgOvQrxFtCKinyv//WEUTAubehPOdr9gv+HLT+DPL16BT77hQt/2qLe6JmQyVNDJe9eAVRSGVYk4GiMa9pywxxEok18akJMniEVkaLxU5POGCc6Bc5Y1y32CJZRhi2TnihY459KFh60M9eLQJCZzBp4+NOZ7rZ3J28eMBpy8iH28A6+qwkomQD344ZfivS9fJ3+muGZpQCJPEItEc1TDYEgmL7L1NZ2NAGy3b4YMvJa8zjBxzPPNwAhZ41UM6O4bnkLKiXmA0jr5sBmvLZ6B16aoBhZw6o1RDXHdFX4S+aUBiTxBLBDcs7pTZ1MUPR0NGAqJa0SVTG9HAxiDjGuCA69B8oYls3zAnfHqdfJb+5NyladnnfViLctuJSwz+SrimqaQhmQA5I0CoLhmqUAiTxALhNdRr2iLobs1HjrwKpx8c0xDZ1PUjmuC1TUh00k5B548eBKAvVxfWAnltv4krjvnDKgKkzcEEc1IJ68pvqZkbnWNJo9Xbv1V7zcMGnhdGpDIE8QC4RXbq8/qRFdzFCfThbL7RTUVnU1RjE4VwDkCcU24gO48ZlfucO7GLGJANlMwMDiew6Y1CfR2NGD/8BQAj8iXdfLipuNW05QTeXEMimqWDiTyBLFAiJmn/3bzBfjHG89FJDCzVCDilaimIKIpUqR9bQ3KNIYZdVoOAG5bYNHTRrRBaIpqaIioUtyLht/JRzRWsYTSfuwKvhexshSJ/NKBRJ4gFgjXoYcPcAb3i+kqdIVJkVamyeQBv8gLURc3l6zzd0xTfW5dRDPeTD6srYE3h5/WyVMev2QgkSeIBUI48phTgWJn35WdvF0Pb/88XQklEFzgw3De1z/zNRZRfd8QhNjranidvKjSiUdUGRNNN/BKTn7pQCJPEAuEcOR+J89l1U2uaOJf7t8lJxtFNRW6qsjX+atryv/XbY3bUYpjwJE37Pp5cZy4riKqqVLIgwOvkcDNR7Qd1hVF3qDKxTXCyZPGLx1I5AligRDOOeoIZcRxxSIO2TU4gW/98TAe2TMMAIjpiiPyTiZfxcArAHQ0RkLe23LjGl1k/f4Zs+J8xM3Hcs5LOHlNZbIOvmx1jXOjqHQTIhYW+iQIYoEQpZExj5MH3IFNIeai4VhUU6F5MvnpSigF7R6RF7FO3rBkRh/XVV+nyWKIkwfc5QPFTUhXvU6+ciZPS/8tHWYl8oyxv2KM7WKMWYyxzYHnPsYY288Y28MYu2F2p0kQtU8u4OSF2y0atohmA62Do46Tz4c4+eDAq3cSklfkG53sPF803UxeVxFVPU7e9GfyQqgLAaeve5x82UxellBW+k0QC8lsP4qdAN4IYIt3I2NsA4BbAJwP4EYAX2aMlfYiJYjTiHwgkxfxiBBZcRMQi4DENHugU4ixt2JFDzj5tribkXc0eUTe6S/jjWviERVRXSktoVQDTt7J4g3LgqowMMYQi0yTyYuBV3LyS4ZZiTznfDfnfE/IUzcBuJtznuecHwKwH8Dls3kvgqh1vKWRQGlcE+bkNdUVY6+TV50bhNDStgZXdMOcfDbg5L0llHnh5AMxknjeMN3maHHdfq78jFfmOz9i8ZmvL1UrARz1/DzgbCuBMXYbY6yPMdY3MjIyT6dDEIuPtzQScMVUtA0Qz+c9zto7wOobeHUeNzsi3hZ3hb29MSofr+uym5ztOzHlq67xllCWd/JunCOei00T12iqAoWRk19KTCvyjLHfMcZ2hvy5qdLLQrbxkG3gnH+Nc76Zc765q6ur2vMmiJqjZDKU87dw6llPI7GIpkBRmK8XTFgJZYsT07R6nLy3uuaiVW2I6Qr6+sfcuGaaEkpxY8l7nbzK5GsBe1HvcuiqQn1rlhDTLhrCOX/VDI47AGC15+dVAAZncByCqBuCk6FEJu9W17i16eJG4K2iCRt4tcU2i0SZuCamq7hoVRu29Sfx8nPOkMeOaAosbpdHFgO9a6IBJ29YlrypyIHXMnENYN8syMkvHeYrrrkPwC2MsShjbC2A9QCenqf3IoiaIDjwKgS8GOLko5rI7T1xTcjAa0vcFtvWeLjI6yrD5t4Edg1OIJUpIOp8QxCuPW9YbvVMmUy+aHL3BjBNCSVg3yxoxuvSYVbL/zHG3gDg/wfQBeBXjLHtnPMbOOe7GGP3AHgBgAHgA5zz6VcTJog6JmeYUBUmXbGuhQ+8AvaEJcBfKhnm5NviEagKQ6IxAoXZMY43L1cVho09CRgWxzOHk4g71THiRlMwLDmjNZjJF0wLN35+C148PonejgYA9sIhTVEtdNESQUQjkV9KzErkOec/B/DzMs99EsAnZ3N8gqgn8kVLToQCXJdeMEoX9wgOzgLBGa/29oaoim+//TKcv6IVX/z9fjREVHnzAGzHv7rdFugjYxl5A/AKeSEw8CqOvef4JF48bq/XKqpr3nHNWlx37hkVr1MnJ7+koIW8CWKByBuWjDsAV1TFGqq50LgmXOTF46im4Nr1XfJxc0yXxxX7ibx+Km+gqznqe++CYZXMeBXvKRYg8W5b1hLDspZYxeuMaArNeF1C0Lw0glggckUz4OTLZ/IyrgkRdu92700gqqloimo+kddUhrYG/0AsAE8mb8oSSu9C3gDw5EF3se9K8UwQXVXKdskkFh4SeYJYIIJO3h3gdNoa+KprKjt5XS2Nc2K6guaY5mtxoCl2JY2IacRkJnH88WwRR8YyMs/3HtPbm/5USiJF+SexNKC4hiAWiLxhSpcMuM5ZllB6Bl6jur9mHQisDOVs9wp6Z1MUy1tjoROoEo06pvKGdPLiPP75vl3YeWzCV4LprdTpaW/AkbEMUpnSZQrL0dEYqdglk1hYSOQJYoHIFcOdfDHQuwaw+9YA/pa9/hmvpU7+K2/ZhIiqyFmnFndvEomGCI6OZWWdu7g59J/MoLejAd9622XyOMtbY7j/9mswkStiMlfEe7+3Tfa4r4bP/dXFobMhicWBRJ4gFogSJ1+hhNJ18pUnQ3mPJwZVxevyhuU6eSeXjwVKKCdzBs7rbsG6ribfuV64qhUAsHtoAoC7Xmw1tIf0sycWD8rkCWKByBtWmbhGZPJhJZTl6uRLn/cSrJQRcYz4huCNeVoqTGxa0RaveE3E0odEniDmgVzRxOP7RgPbLJmJA24Zo7etgaiqEfuVbWvgPI6UqXpx+7qLTN521/GIf8ITUL7ZGFD5BkDUBiTyBDEPfP+pI3jLN57C8GRObisdeC1dGarD6SA5rZMXJZRa+H9hPeD0RVwTlwOv7s2mXG94AGDOYO/LzqbmgbUK3aYJYh545pBdYz6VM3BGs70tX7R84qp54hrOObJFE2s7G3EslS3pOQ/4a+aDqzgFkYt3ON8EhJMP1skDlZuNAcDBf38tlUTWMOTkCWKO4Zxj65EkALddr/3YlHEM4FbI2LNOOUyLy1WdZBMzj5NXpimh9CIcvCYHXp1MXnbAdF9XqdkYcGo18sTSg0SeIOaYgWRWlhz6RD7g5BWFQVMYiqYlyydFZYrYz5vJhz0uNxM1IkswbYFuD8Q13ptDpbiGqH1I5InTnmzBxFcePSAbdc2Wrf1J+djbj8ae8er/L6erCgyLy4lQnU12Ji8cf0TzOHnF+7rpBl6Fk7efF60Ngl0oAXd1KaI+IZEnTns+++Ae/McDL+KBXcfn5Hh9/W7PF+Hk84aJgmnJhbUFuspQ8CyyvaajEZetSeCiVW0AylfXrOtqwkWrWnFud3PoOQinLuKaNZ0N2NjThouc+vdTiWuI2oY+XeK05+nDdrfFuK5Os2d1bO1PoaMxgpPpAvKOeKcyRQDwNQsDbDEumq7ItzXo+PF7XyKfL9dPvr0xgvtuv6bsOYgYR7y+IaLhZ++/Wj5vLy3IUDR5xRJKovYhJ0+c9uwesnume6OVmTKZK2LP8QlcdWaHfUzHySed3i/B2aCaYou8d5FtL762wafQvtd18uX/i4vcnzL5+oZEnjitGc8WYVr2jNNMwZj18bYfTcHiwNVndQKAdPLJtHDyfkHVNdtNi5YGsYDIa2r4wOt0BJ18GOJGQHFNfUMiT5zWbD+ako/T+eqcfK5o4u/v2Y7hiVzJc1v7k2AMuGJtOwA3ky/n5HVVQcG05LeIWGBg1lsbfwoaX5LJh+6jksifDpDIE6c1Q6msfJytMq7Ze2ISP9t2DE8dGit5biCZxfKWGDqcKhkh3kLkE8FMXlVgmBYOjEwBKO0VE+wNXy0R6eQrxDXODYUy+fqGRJ44rUl7uium89XFNSJaCbspTOUMNMc06cilk0/bIl8S16gKiibHtiNJrErES5bWm7GTV6tz8nFdrXgjIGofuoUTpzVZJ4eP62rV7XTFYGo+ROQn80XfEnxuXFNEY0T1TYYC3BLKncfG5WCtF22GA6+65p/xGkZEUyiqOQ2gWzhxWpMumNBVhta4XvXA6/ROXgdjDFFN8Qy8FkrKJwFbxA+NpjE8mcem3kTJ8+UalE1HRFWnfU1EU6btW0PUPiTyxGlNJm+gIaKhIar6optKiJw9WyidITvpxDWAXSnjHXgNW0wjoio45owLbOwJE3n7v6jC3I6Q1RDVFURUpeJr4rqKFiqfrHvoNk6c1mQKJhojKhoiKjJVZvJC5L3L9QkmPCIf1RS571imWJLHA36nftYZTSXPi7jlVAZdAeCvL+vBhu6Wivv8/fVny/JRon4hkSdOazIFE/GIioaIVnUmn5VOPiSuyRfl5KKorkgnn8oU0NveULK/cOqdTZGSGnnAdu+awk5p0BUAejoa0NNR+n5eNq9pP7WDEjUJxTXEaU26YKAxqqExUv3AqxD54AxZMXNVlCTGNBV5w83kw+IasehHd2v5ZfZ0VTllJ08QAvqXQ8wJnHO5wlEtkcmbaJBOPjyusSzu61ApWhB4Rd4wLYxn7VmtMq7RFeSKFgzTwkTOCI1rRBVOd2us5DmBpjJQS3dippDIE3PC9546gpd95hFwXlsZb6boDLxWcPKff3gfbvj8FnltcuDVI/Lv/d42vP/72wC4vWCijpMfnbJr5DtCnLzI3CstmK2rCtWyEzOG/uUQc8KB4SkMjucwVeXg5VJBOPnGqFZ2MtSje4ZxaDSN/pMZAN4SStfd7zw2LvvIy7hGV5AvWtgxkAIAbFhROhAqau4rOXldZb5VoQjiVCCRJ+aEyZwtkKKlbq2QLhhojGiIR9TQuvdMwcCuwQkA7mIgMpN3xN4wLQxP5mSlSousrlGRM0xs608ioio4f0VryfFHnRWkuis4eU1RQEaemCn0T4eYE6bytriPOdP3awVRXdMYUVE0ecnqUDuOjkvx7nNEPlhCeWIyD28lohvX2E6+rz+JC1e1hlbPjE7ZIr+yrbKTp4FXYqbQv5wljGlx7D0xuaDveWIih5OO8JwKwsmLRlwLychkXq6peipwzu06+ag98AoAOwZSUugHU1n84tljAICLV7fhTwdG8eyRpGcylIlDo2kcdJqLCZo8k6EmcwaeHxgPnc0KACPO77pSdY2mKqdcQkkQAvqns4R5YOdx3Pj5LRgaz06/8xzxvu9txcfv3XXKrxNZ/GLENf/rJzvwkR/vOOXX5Q0LpsXlwCsA/NVX/4Tv/OkwAODDd2/Hj/qOYkN3C157wXIcPpnBG778BA472XymYOIVn3sUb/3G077jeidDHZ/IoWBauGR1W+g53Hj+cgDAGc3RsudJJZTEbKDJUEuYwVQWFgeOj+cqOr255OBoGjOZBCmc/GLENceSM7sJigHUhoiKBk+73acOjeFd167D4ZNpvHrDMvzHX1zkdJZU8c/37cKRMVvkR8p84xEDr97FsntCJkIBwL/efAE++ppzK1bP2E3MTu3aCEJAIr+EGXOij4WKQLIFE6lMEcn4qb/fZM528KlFiGtm+vtJO8rZGNF8C2xv60+iYFgYmcrjvO4WJJzSxwtW2tUxIs7x5veNERWZogldUWT27s3gy1XP6KoS2rjMi6ZQJk/MHBL5JYwQTLF03Hwz6MRCyRm4cenkF1jkOedIOhER5/yUmniJuviGqIq4R+RPpgt46tBJcA6s8AyIBhf88LIq0YBUtgDDdL8GCScf1ZTQ2a7VoqsKFKX2JpoRS4O6sAdD41n8n4f3YTA1/dd2y+L42baBqmdncs5x7/ZjoTXUg6ksHt0zXLL9wV3HZySUQUT0EXSqW/aO4EuP7MeXHtmPJw6MVjzGnw6cxOHRdFXvN5Syl7ObyBkwTAtD41l8+dH9uOeZoxUnORUMy9c3XfDL5warXohjOnJFE/duP1ZyHhM5A6bFYVocE86NJlMwQvcV7B+eRN/hMSnyjRFN9moXM0vv3zEIwD8gWknku9ti6G6N+1r3Rh0n39kUPaWbTxA7k6c6eWJm1IXIpzJF3PHQXjx58OS0+z5zeAx/f88O/Pr5oaqO/fyxcXzo7u24+5mjJc994Xf78M67+nxClkwX8J7vbsXX/nCw+gsogxDMoMh/5Mc78NkH9+CzD+7B//zR9rJiZpgW3v2dPnzh4X1VvZ/3JpnKFvHVRw/gMw/swf/z0+ewb3iq7OumAtcPAIdH07j9B8/iF9uPVfXe0/Hr54fwobu3y1p1eZ6e3414/OO+AXzo7u141rN+q5dP3PcC/v6eHbLrZDyiYm1XIwDgv/92M1piGh7YeRyAfyZqS1wPbS+wKhHHZWvaceW6Dt8Aq3Dys3HxAHD+ihac1908q2MQpy91IfJnL2tGc1STdcyVGHAG6YJiUY6+w/Z+20L27+sfg2lx7PCIiegNXu3xK5GUTt51x3nDxPBkHn/3yvX45z/bgBMTefmeQfacmMRU3qh6YHLQU8WTyhTQ15/ESkfkKl2PyOO95yp+zzMdFA1S7nPzDvSKx+LfQdhnZpgWnj2SxNB4FpN5N5M/ozmGw59+HV553jJs7E3IbwXeuEZV7MVFAKDZM1D76w9diw+84ix89DXn4gu3XCq3Cyc/W5H/2GvPw2f+8uJZHYM4fZmVyDPGPssYe5Ex9hxj7OeMsTbPcx9jjO1njO1hjN0w6zOtgKowXNLTFvqfOogoR6xWhLcesffr6x/zOeZkuoADI+mSYw2N25HHjqOpWTfskk7eI2Qnxu2KjlWJOC5f217y/l7E72OwyhJMEdcAwNFkFruHJvDGjSvR0RiZRuRtQWxr0OW5ivcUv4/ZUu5z85ZsisfiusPO+cXjk0gXTBRNjqNOlUxD1D9JaZOzeEdrXJf18wIR2bQ12mKvKcwn+F5ExBLWs4YgForZOvmHAFzAOb8IwF4AHwMAxtgGALcAOB/AjQC+zBgrne43h2zqTWDPiUlM5CoPUh5zhGz30ERVfVbElPSgY97miH9EVeSNAHAjj7xh4QVnOvxM4Jy7A6+eSEKcw4rWOM5Z1ozGiFpWgMX24+O5qhaHGBzPyojhsT0jsLj9e93Ym6hK5HvaG+S5it9DuW8Zp4r43LYdSfpvtp7fTTJTwNB4FsdSWURUBX39yZIoa5vns9rvRFANkYDIr7FFPqwiRlTaSLFv0Mvm7aIr5WydPEHMhlmJPOf8t5xzoZRPAljlPL4JwN2c8zzn/BCA/QAun817Tcfm3nZwDnzwB89i1+A4AOCBnUO4/QfbcPsPtuHOh/YCsB2hwgCLwxez/Pp5e99v//EQ+k+mccdDezGQzGBoPIebL10BwM7C+w6P4ZfPDeJzv90LTWF43UXdeOrgGP73L55H3jAx6BwfsGOD3zw/hN948v/hiRw+/ZsXkSkY+NRvdsuZmj965gj+uN8dRJ3MGzAcYR6eyONTv96N0am8dLQr2mLQVAWX9LRJAU6mC/jIj3fgH+7ZgeHJHPr6k1AYYFgco1N5PHXwJD74w2fxuQf3SPEzLY7PPbgHR8fsaz13uZ39PvTCCTAGXNqTwKbeBA6Npn0zYf97y0Hc/oNteOiFEzKuWd3egLxhIVMw5LcCcb4/3TqAR/cM4/F9o7j9B9vwhd+FjxN85dED8jP7xbPH8PzAOP57y0EMpezf6+hUAX9393Z5EwnGNeJ3cfOlKzAymcfRsSzufGgvDo2m8Z0/HcY3Hz8kPx9X5P1O/OJVbVAVFtoZMuG0CxYiX2kwVpxbgkSeWETmsoTyHQB+5DxeCVv0BQPOthIYY7cBuA0Aenp6Zvzmm3oTuHxtO544MIrvPRnDp954Ee54aC+GUjnEIyp++dwQ3nxFD4ZSOVy+th1PHRpD3+Ekrj6rEwDwn7/dgwMjaTz0wgkcPpnBt584LOOWN1/Ri+MTtkh+/Q+HsOfEJEan8rjl8tV4/UUr8OyRJL735BHccP5yDKVyWJVogGlxbOtP4q5j42AMeM2F3QCAn2wbwFcfOwDGgP967CA6GiN4+9Vr8Yn7XsBFq1rl+aScssmIpuDgaBr/teUgupqjsopFVH1s6m3HF3+/D1N5Aw/sOo6fbB0AAHQ1RzGQzOIlZ3bgiQMnMZjK4r+2HMTvX7SrgW65fDVWJRqw89g4vvjIfuSKJvpPpvHmK3qxY2Acx1JZnLOsGa1xXU7J39qfxKvPX45MwcCnH3gRpsVx+GQa77h6LQBgQ3cLfvXcEA6OpGVcI75F/Mv9u7C2sxGNUQ1PHDgJYAh/c0UPujwzPSdyRXzmwRfR0RhFwTDx7JEUNvYmcP+OQUQ0Ba/esBwDqQzu3zGI87qb8f6Xn4VUpijXP01lihhIZhHXVbz1yjW4p28AP3zmCL7y6AGkMgXc0zeAeETF269ei288fgjPHxtHQ0SVk5cEjVENt161BueHdI1sk+LuF/sw3vaSNdh+JIVbLls9zb9egpg/pnXyjLHfMcZ2hvy5ybPPPwEwAHxfbAo5VGhewDn/Gud8M+d8c1dX10yuAYBdIXHPe67C1Wd1Ymt/EuPZIvaemMJtL12H/3rrJgC2SA2O2+J1zrJmGbOIfP3Cla3IGxZ+ts0Wyu8/2Y+GiIrzV7TgO++4HK+7sBt/PDCKQ6Np3P6Ks/BvN1+IK9d14P4PXgPG7OMPjWfR3RrDpt4EtuwbwZGxDPpPZqRjF3nx95/sl+e0e2gC2aKJHQNuji/qzdd2NMpr3HYkicFUFokGXdZ1b+pNyG8lW/uTaG+MYEVrDD94yj7+n11sfwsZTOWw7UgSF65sle8LuIOU9/QdRdHkuOasTsR0+5/FRkfcL1zZCl1l8vclmnZdsLIFu4cmcWLCvraXnd0ljy2y+KLJ8dTBk5jI2d0cnz2SkufgjU4AYPuRFDgHvnDLJfjwq87GsVQWjzklqgXDwsbeNvzyg9diXVej/D2OZQpINETQFtcxlilg25EkLl7dig0rWtAU1eTv+efPHkO2aOL/u+l8/O/XnWe3ATYsXNpju/YgH/+zDfiLTatKtovoRcY2jeUXwl7RFsc9770KHU3lWxYQxHwzrchzzl/FOb8g5M+9AMAYuxXA6wG8mbsB6AAAr31ZBWBwrk8+jE09Cew9MYXH9o7YP/cmcP6KVkQ0BVv2jWAyZ6C7LY6NvQk825+EZXE8e9QWjHe/dB0AyMqKiZyBS1a3ySnnG3sTMn/2Npxqjun2TaM/icFUDiva4tjk2RewhY9zLsVVvMfW/pSs4MkVLewesnN8kTWv62r0HWMo0OLg0p42eYPZ1p/Exp4ENq1px0TOQFRT8KrzlgEAHt8/ilSmiL+5ogeNEVWK5LbA+WzsTUh3utm5xpiu4oKVre5rHHF+97XrYFpcxkzrlzWhuzWGvv4khlJZrG63z/P+5+y4yrA4skUTf3tVLyKqUjJQvtWJly5e3SZ/vxOe36H89tKTkL/PVKaAtgYdbQ06BlNZ7BqcwKbeBFSF4dKeNt9nKT43xhhWeI51KrSdQlxDEEuB2VbX3AjgHwH8Oec843nqPgC3MMaijLG1ANYDeDrsGHONGDT7+h8OQlUYLl7dhoim4OJVrbh/hy023a0xbOpJYDJvYO/wJPoOJ6EpDNeftwyrEvZ/ftFkyivm4rGuMlyw0t8bfPOahPymIJw8YMctEVXB1v4xHBxNI5kpymM3xzSMTuXxi+3H5DYh+KJKxbsY84mJPJ45NOYr62txbjAPvXACB0fT2LwmgU09bQDsbLmzKYK4ruKXzuSey9e245KeNjko2dc/Jt97XVcj2hsjUrh8196TwI6BceQNE32Hx7D+jCa8/JwzANg3kIimIKqp2NibwKMvDiNdMLG5167++eWOQV8FykvO6sQFK1tKSl639idx7nLbgW9Y0SK/UYjzE9e9eU0CyUwRB0fTGHPWTm1vjOCxvSMwLS7fd6Mj4PL1rTF5o+h2jrXpFBezbg/GNZS3E0uc2VbXfBFAM4CHGGPbGWNfBQDO+S4A9wB4AcADAD7AOa9uleRZcslq++v3cwPjOK+7GY2OuGzsTchqGuG0AeDt33oG3/lTP85f0YJ4RJXb/+aKHvk6gajHv2BlaW/wTb0JZAomOLcXgDh3eTMaIiouWtmKC1e14gdPHcFbv/4UADvj9/793MA4XnZ2F1a2xfGFh/fh+jsew6d+8yIAIO68zyvPtQV1Mm+UNCvb2JvA88fG5XlsEiInXGtbDJN5A4kGHes6G7GpJ4EXhibwqjsew4mJvDwP4WoTjTo6GiPo9dxgNvUmUDAs3HDnFjy+fxSbehNojes4e1kTALduXNw8vb+7ybyBK9Z14MyuRixviWGFcxPcfjSF6+94TP558uBJz41UwUWr2hDXVbzxUns4xx2HsPd569efwrNHUmhriKCtIQLxPfJS5yZX6bNc0Rp3BpbbcCrITL7RL/YEsVSZ1cAr5/ysCs99EsAnZ3P8mdAQ0fC/bjgHzw2kcNMl7ljvmzavxmAqh8aIigtXtiKqKXjPy9bJWuk3XGrnr++6Zh3OXd6Cv9y0CqbJcdW6DnkMVWH4f1+/AWe0lGas1527DH+xcRVMy8Irzz0Dmqrg46/fgBVtcZgWx4+32jNmX9cWx/tefiYKhoX3v+JMFE0LxydyePvVa3HduWn8bvcJecye9ka869p1SOcN/N0r1+OOh/ZieDKPvwxkxW+5ohcT2SLaGnRcsroNCmP4wCvOxJs224nZ+15+Fn7/4glcu74LjDH8xaZVOHwyA8OycNGqNrzjmjVoiqp4hXMjedc16zCeLfpKA196dhfetHkVpvIGLljZirdcad8YPvyqs/HL5wale379Rd143hlsfv2F3RhMZXHkZAZ/e1Uvkpki8oYJxhj+x2U9OD6Rh2m5cwnOWd4sBRkA/u669TgylsFVZ3agtSEiSxrP7GrCu69dK8sz37R5NRhj0FWGc5e3SCG+Yl073n3tWrzz6rVob4jgCs9n+TdX9ODc7ha0xE5NpK8+qwO3vXQdXnXeMnzolevxWmdAnSCWKmwpLby8efNm3tfXt9inQRAEUVMwxrZyzjeHPVcXbQ0IgiCIcEjkCYIg6hgSeYIgiDqGRJ4gCKKOIZEnCIKoY0jkCYIg6hgSeYIgiDqGRJ4gCKKOWVKToRhjIwD6Z3GITgCVV7auTer1ugC6tlqFrm1p0cs5D23ju6REfrYwxvrKzfqqZer1ugC6tlqFrq12oLiGIAiijiGRJwiCqGPqTeS/ttgnME/U63UBdG21Cl1bjVBXmTxBEAThp96cPEEQBOGBRJ4gCKKOqQuRZ4zdyBjbwxjbzxj76GKfz2xhjB1mjD3vLKnY52xrZ4w9xBjb5/x9aitQLxKMsW8yxoYZYzs928peC2PsY87nuIcxdsPinHV1lLm2TzDGjjmf3XbG2Gs9z9XEtTHGVjPGHmGM7WaM7WKMfcjZXvOfW4Vrq/nPrSyc85r+A0AFcADAOgARADsAbFjs85rlNR0G0BnY9hkAH3UefxTAfyz2eVZ5LS8FsBHAzumuBcAG5/OLAljrfK7qYl/DKV7bJwB8JGTfmrk2AN0ANjqPmwHsdc6/5j+3CtdW859buT/14OQvB7Cfc36Qc14AcDeAmxb5nOaDmwDc5Ty+C8DNi3cq1cM53wJgLLC53LXcBOBuznmec34IwH7Yn++SpMy1laNmro1zPsQ53+Y8ngSwG8BK1MHnVuHaylEz11aOehD5lQCOen4eQOUPrRbgAH7LGNvKGLvN2baMcz4E2P9QAZyxaGc3e8pdS718lrczxp5z4hwRadTktTHG1gC4FMBTqLPPLXBtQB19bl7qQeRZyLZarwu9mnO+EcBrAHyAMfbSxT6hBaIePsuvADgTwCUAhgD8p7O95q6NMdYE4KcAPsw5n6i0a8i2Wru2uvncgtSDyA8AWO35eRWAwUU6lzmBcz7o/D0M4Oewvx6eYIx1A4Dz9/DineGsKXctNf9Zcs5PcM5NzrkF4L/hfrWvqWtjjOmwRfD7nPOfOZvr4nMLu7Z6+dzCqAeRfwbAesbYWsZYBMAtAO5b5HOaMYyxRsZYs3gM4NUAdsK+plud3W4FcO/inOGcUO5a7gNwC2MsyhhbC2A9gKcX4fxmjBBBhzfA/uyAGro2xhgD8A0Auznnd3ieqvnPrdy11cPnVpbFHvmdiz8AXgt7lPwAgH9a7POZ5bWsgz2avwPALnE9ADoAPAxgn/N3+2Kfa5XX80PYX3+LsF3ROytdC4B/cj7HPQBes9jnP4Nr+y6A5wE8B1sgumvt2gBcAzuSeA7AdufPa+vhc6twbTX/uZX7Q20NCIIg6ph6iGsIgiCIMpDIEwRB1DEk8gRBEHUMiTxBEEQdQyJPEARRx5DIEwRB1DEk8gRBEHXM/wXvS0lqXQWPbQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-5:]) > 16.:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
